{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8efcc7cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据库\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "from openpyxl import workbook \n",
    "from openpyxl import load_workbook\n",
    "def getHtmlText(url,user_agent): \n",
    "    try:\n",
    "        r = requests.get(url,headers = {'User-Agent':user_agent},timeout=30)\n",
    "        r.raise_for_status()\n",
    "        r.encoding = 'utf-8'\n",
    "        return r.text\n",
    "    except:\n",
    "        return \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a5112c47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://www.maigoo.com/news/660677.html\n"
     ]
    }
   ],
   "source": [
    "f=open('d:\\\\books\\\\zuoye.txt','r')\n",
    "#读取网址\n",
    "url=f.read( )\n",
    "f.close( )\n",
    "#读取所获网站里的一个超链接\n",
    "user_agent = \"Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0\"\n",
    "r = getHtmlText(url,user_agent)\n",
    "soup=BeautifulSoup(r,'html.parser')\n",
    "links=[ ]\n",
    "for url1 in soup.find_all('a',target=\"_blank\"):\n",
    "    links.append(url1.get('href'))\n",
    "url2=''.join(links[28])\n",
    "print(url2)\n",
    "#保存新的网址\n",
    "f=open(\"d:\\\\books\\\\cunqu.txt\",'a',encoding='utf-8')\n",
    "f.write(url2)\n",
    "f.close( )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cb0e1e5a",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "trs=soup.find_all('tr')\n",
    "urli=[ ]\n",
    "#获取第一个网站的数据\n",
    "for tr in trs:\n",
    "    ui=[]\n",
    "    for td in tr:\n",
    "        ts=td.string\n",
    "        ts=ts.strip( )\n",
    "        if ts=='':\n",
    "            continue\n",
    "        ui.append(ts)\n",
    "    urli.append(ui)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "720579e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['排名', '地区', '2022年GDP（亿元）'],\n",
       " ['1', '广东省', '129118.6'],\n",
       " ['2', '江苏省', '122875.6'],\n",
       " ['3', '山东省', '87435.1'],\n",
       " ['4', '浙江省', '77715.4'],\n",
       " ['5', '河南省', '61345.1'],\n",
       " ['6', '四川省', '56749.8'],\n",
       " ['7', '湖北省', '53734.9'],\n",
       " ['8', '福建省', '53109.9'],\n",
       " ['9', '湖南省', '48670.4'],\n",
       " ['10', '安徽省', '45045'],\n",
       " ['11', '上海市', '44652.8'],\n",
       " ['12', '河北省', '42370.4'],\n",
       " ['13', '北京市', '41611'],\n",
       " ['14', '陕西省', '32772.7'],\n",
       " ['15', '江西省', '32074.7'],\n",
       " ['16', '重庆市', '29129'],\n",
       " ['17', '辽宁省', '28975.1'],\n",
       " ['18', '云南省', '28954.2'],\n",
       " ['19', '广西壮族自治区', '26300.9'],\n",
       " ['20', '山西省', '25642.6'],\n",
       " ['21', '内蒙古自治区', '23158.7'],\n",
       " ['22', '贵州省', '20164.6'],\n",
       " ['23', '新疆维吾尔自治区', '17741.3'],\n",
       " ['24', '天津市', '16311.3'],\n",
       " ['25', '黑龙江省', '15901'],\n",
       " ['26', '吉林省', '13070.2'],\n",
       " ['27', '甘肃省', '11201.6'],\n",
       " ['28', '海南省', '6818.2'],\n",
       " ['29', '宁夏回族自治区', '5069.6'],\n",
       " ['30', '青海省', '3610.1'],\n",
       " ['31', '西藏自治区', '2132.6'],\n",
       " ['排名', '省市', '2022年GDP增速（%）'],\n",
       " ['1', '福建', '4.7'],\n",
       " ['2', '江西', '4.7'],\n",
       " ['3', '湖南', '4.5'],\n",
       " ['4', '甘肃', '4.5'],\n",
       " ['5', '山西', '4.4'],\n",
       " ['6', '湖北', '4.3'],\n",
       " ['7', '陕西', '4.3'],\n",
       " ['8', '云南', '4.3'],\n",
       " ['9', '广西', '4.2'],\n",
       " ['10', '内蒙古', '4.2'],\n",
       " ['11', '宁夏', '4'],\n",
       " ['12', '山东', '3.9'],\n",
       " ['13', '河北', '3.8'],\n",
       " ['14', '安徽', '3.5'],\n",
       " ['15', '新疆', '3.2'],\n",
       " ['16', '浙江', '3.1'],\n",
       " ['17', '河南', '3.1'],\n",
       " ['18', '四川', '2.9'],\n",
       " ['19', '江苏', '2.8'],\n",
       " ['20', '黑龙江', '2.7'],\n",
       " ['21', '重庆', '2.6'],\n",
       " ['22', '青海', '2.3'],\n",
       " ['23', '辽宁', '2.1'],\n",
       " ['24', '广东', '1.9'],\n",
       " ['25', '贵州', '1.2'],\n",
       " ['26', '西藏', '1.1'],\n",
       " ['27', '天津', '1'],\n",
       " ['28', '北京', '0.7'],\n",
       " ['29', '海南', '0.2'],\n",
       " ['30', '上海', '-0.2'],\n",
       " ['31', '吉林', '-1.9']]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urli"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a1ab51a4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     0    1             2\n",
      "0   排名   地区  2022年GDP（亿元）\n",
      "1    1  广东省      129118.6\n",
      "2    2  江苏省      122875.6\n",
      "3    3  山东省       87435.1\n",
      "4    4  浙江省       77715.4\n",
      "..  ..  ...           ...\n",
      "59  27   天津             1\n",
      "60  28   北京           0.7\n",
      "61  29   海南           0.2\n",
      "62  30   上海          -0.2\n",
      "63  31   吉林          -1.9\n",
      "\n",
      "[64 rows x 3 columns]\n"
     ]
    }
   ],
   "source": [
    "z=pd.DataFrame(urli)\n",
    "print(z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a3a2bf97",
   "metadata": {},
   "outputs": [],
   "source": [
    "#将第一个网站的数据存在excel的sheet1里\n",
    "z.to_excel('GDP排名.xlsx', sheet_name='Sheet1', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e856d31d",
   "metadata": {},
   "outputs": [],
   "source": [
    "r2 = getHtmlText(url2,user_agent)\n",
    "soup2=BeautifulSoup(r2,'html.parser')\n",
    "trs2=soup2.find_all('tr')\n",
    "urli2=[ ]\n",
    "#获取第二个网站的数据\n",
    "for tr in trs2:\n",
    "    ui2=[]\n",
    "    for td in tr:\n",
    "        ts2=td.string\n",
    "        ts2=ts2.strip( )\n",
    "        if ts2=='':\n",
    "            continue\n",
    "        ui2.append(ts2)\n",
    "    urli2.append(ui2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7ecb1712",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['排名', '省市', '城市', 'GDP（亿元）', '实际增速（%）'],\n",
       " ['1', '上海', '上海', '44652.8', '-0.2%'],\n",
       " ['2', '北京', '北京', '41610.9', '0.7%'],\n",
       " ['3', '广东', '深圳', '32387.68', '3.3%'],\n",
       " ['4', '重庆', '重庆', '29129.03', '2.6%'],\n",
       " ['5', '广东', '广州', '28839', '1.0%'],\n",
       " ['6', '江苏', '苏州', '23958.3', '2.0%'],\n",
       " ['7', '四川', '成都', '20817.5', '2.8%'],\n",
       " ['8', '湖北', '武汉', '18866.43', '4.0%'],\n",
       " ['9', '浙江', '杭州', '18753', '1.5%'],\n",
       " ['10', '江苏', '南京', '16907.85', '2.1%'],\n",
       " ['11', '天津', '天津', '16311.34', '1.0%'],\n",
       " ['12', '浙江', '宁波', '15704.3', '3.5%'],\n",
       " ['13', '山东', '青岛', '14920.75', '3.9%'],\n",
       " ['14', '江苏', '无锡', '14850.82', '3.0%'],\n",
       " ['15', '湖南', '长沙', '13966.11', '4.5%'],\n",
       " ['16', '河南', '郑州', '12934.7', '1.0%'],\n",
       " ['17', '广东', '佛山', '12698.39', '2.1%'],\n",
       " ['18', '福建', '福州', '12308.23', '4.4%'],\n",
       " ['19', '福建', '泉州', '12102.97', '3.5%'],\n",
       " ['20', '山东', '济南', '12027.5', '3.1%'],\n",
       " ['21', '安徽', '合肥', '12013.1', '3.5%'],\n",
       " ['22', '陕西', '西安', '11486.51', '4.4%'],\n",
       " ['23', '江苏', '南通', '11379.6', '2.1%'],\n",
       " ['24', '广东', '东莞', '11200.32', '0.6%'],\n",
       " ['25', '江苏', '常州', '9550.1', '3.5%'],\n",
       " ['26', '山东', '烟台', '9515.86', '5.1%'],\n",
       " ['27', '河北', '唐山', '8900.7', '4.7%'],\n",
       " ['28', '江苏', '徐州', '8457.84', '3.2%'],\n",
       " ['29', '辽宁', '大连', '8430.9', '4.0%'],\n",
       " ['30', '浙江', '温州', '8029.8', '3.7%'],\n",
       " ['31', '福建', '厦门', '7802.66', '4.4%'],\n",
       " ['32', '辽宁', '沈阳', '7695.8', '3.5%'],\n",
       " ['33', '云南', '昆明', '7541.37', '3.0%'],\n",
       " ['34', '浙江', '绍兴', '7351', '4.4%'],\n",
       " ['35', '山东', '潍坊', '7306.45', '3.7%'],\n",
       " ['36', '江西', '南昌', '7203.5', '4.1%'],\n",
       " ['37', '江苏', '扬州', '7104.98', '4.3%'],\n",
       " ['38', '河北', '石家庄', '7100.6', '6.4%'],\n",
       " ['39', '江苏', '盐城', '7079.8', '4.6%'],\n",
       " ['40', '吉林', '长春', '6744.56', '-4.5%'],\n",
       " ['41', '浙江', '嘉兴', '6739.45', '2.5%'],\n",
       " ['42', '陕西', '榆林', '6543.65', '5.6%'],\n",
       " ['43', '江苏', '泰州', '6401.77', '4.4%'],\n",
       " ['44', '浙江', '台州', '6040.72', '2.7%'],\n",
       " ['45', '湖北', '襄阳', '5827.81', '5.4%'],\n",
       " ['46', '河南', '洛阳', '5800', '/'],\n",
       " ['47', '山东', '临沂', '5778.5', '4.2%'],\n",
       " ['48', '福建', '漳州', '5706.58', '6.9%'],\n",
       " ['49', '内蒙古', '鄂尔多斯', '5613.44', '5.4%'],\n",
       " ['50', '山西', '太原', '5571.17', '3.3%'],\n",
       " ['51', '浙江', '金华', '5562.47', '2.5%'],\n",
       " ['52', '湖北', '宜昌', '5502.69', '5.5%'],\n",
       " ['53', '黑龙江', '哈尔滨', '5490.1', '2.5%'],\n",
       " ['54', '广东', '惠州', '5401.24', '4.2%'],\n",
       " ['55', '山东', '济宁', '5316.9', '4.4%'],\n",
       " ['56', '广西', '南宁', '5218.34', '1.4%'],\n",
       " ['57', '江苏', '镇江', '5017.04', '2.9%'],\n",
       " ['58', '贵州', '贵阳', '4921.17', '2.0%'],\n",
       " ['59', '江苏', '淮安', '4742.42', '3.6%'],\n",
       " ['60', '湖南', '岳阳', '4710.67', '5.4%'],\n",
       " ['61', '河南', '南阳', '4555.4', '4.8%'],\n",
       " ['62', '江西', '赣州', '4523.63', '5.2%'],\n",
       " ['63', '安徽', '芜湖', '4502.13', '4.1%'],\n",
       " ['64', '山东', '淄博', '4402.6', '4.7%'],\n",
       " ['65', '贵州', '遵义', '4401.26', '3.1%'],\n",
       " ['66', '河北', '沧州', '4388.2', '4.2%'],\n",
       " ['67', '河北', '邯郸', '4346.3', '4.2%'],\n",
       " ['68', '湖南', '常德', '4274.5', '4.5%'],\n",
       " ['69', '山东', '菏泽', '4205.34', '4.2%'],\n",
       " ['70', '江苏', '宿迁', '4111.98', '3.6%'],\n",
       " ['71', '湖南', '衡阳', '4089.69', '5.2%'],\n",
       " ['72', '广东', '珠海', '4045.45', '2.3%'],\n",
       " ['73', '江西', '九江', '4026.6', '4.3%'],\n",
       " ['74', '江苏', '连云港', '4005', '2.4%'],\n",
       " ['75', '广东', '茂名', '3904.63', '0.5%'],\n",
       " ['76', '新疆', '乌鲁木齐', '3893', '0.3%'],\n",
       " ['77', '河北', '保定', '3880.3', '3.8%'],\n",
       " ['78', '浙江', '湖州', '3850', '3.3%'],\n",
       " ['79', '云南', '曲靖', '3802.2', '8.1%'],\n",
       " ['80', '广东', '江门', '3773.41', '3.3%'],\n",
       " ['81', '内蒙古', '包头', '3749.9', '7.2%'],\n",
       " ['82', '河南', '许昌', '3746.8', '1.8%'],\n",
       " ['83', '广东', '湛江', '3712.56', '1.2%'],\n",
       " ['84', '山东', '德州', '3633.1', '4.4%'],\n",
       " ['85', '广东', '中山', '3631.28', '0.5%'],\n",
       " ['86', '四川', '绵阳', '3626.94', '5.0%'],\n",
       " ['87', '山东', '东营', '3620.74', '4.3%'],\n",
       " ['88', '河南', '周口', '3616.99', '2.9%'],\n",
       " ['89', '湖南', '株洲', '3616.81', '4.5%'],\n",
       " ['90', '安徽', '滁州', '3610', '5.5%'],\n",
       " ['91', '河北', '廊坊', '3565.3', '2.1%'],\n",
       " ['92', '福建', '宁德', '3554.62', '10.7%'],\n",
       " ['93', '江西', '宜春', '3473.12', '5.3%'],\n",
       " ['94', '河南', '新乡', '3463.98', '5.3%'],\n",
       " ['95', '四川', '宜宾', '3427.84', '4.5%'],\n",
       " ['96', '山东', '威海', '3408.18', '1.5%'],\n",
       " ['97', '甘肃', '兰州', '3343.5', '0.8%'],\n",
       " ['98', '内蒙古', '呼和浩特', '3329.1', '2.6%'],\n",
       " ['99', '福建', '龙岩', '3314.47', '5.0%'],\n",
       " ['100', '江西', '上饶', '3309.7', '5.1%']]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urli2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "76041ed7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0    1     2         3        4\n",
      "0     排名   省市    城市   GDP（亿元）  实际增速（%）\n",
      "1      1   上海    上海   44652.8    -0.2%\n",
      "2      2   北京    北京   41610.9     0.7%\n",
      "3      3   广东    深圳  32387.68     3.3%\n",
      "4      4   重庆    重庆  29129.03     2.6%\n",
      "..   ...  ...   ...       ...      ...\n",
      "96    96   山东    威海   3408.18     1.5%\n",
      "97    97   甘肃    兰州    3343.5     0.8%\n",
      "98    98  内蒙古  呼和浩特    3329.1     2.6%\n",
      "99    99   福建    龙岩   3314.47     5.0%\n",
      "100  100   江西    上饶    3309.7     5.1%\n",
      "\n",
      "[101 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "x=pd.DataFrame(urli2)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "39a23d59",
   "metadata": {},
   "outputs": [],
   "source": [
    "#将第二个网站数据存在excel表的sheet2里\n",
    "with pd.ExcelWriter('GDP排名.xlsx', mode='a',engine=\"openpyxl\") as writer:\n",
    "    x.to_excel(writer, sheet_name='Sheet2', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
